slugs <- read.table( 'http://www.bio.ic.ac.uk/research/mjcraw/statcomp/data/slugsurvey.txt', header=TRUE) slug.table <- table(slugs$slugs, slugs$field) slugtable <- data.frame(table(slugs$slugs, slugs$field)) # numerical version of count variable slugtable$Var1.num <- as.numeric(as.character(slugtable$Var1)) # single mean Poisson model negative log-likelihood negpois1.LL<-function(p){ LL<-sum(log(dpois(slugs$slugs, lambda=p))) -LL } # separate means Poisson log-likelihood pois2.LL <- function(p){ mu <- p[1] + p[2]*(slugs$field=="Rookery") LL <- sum(log(dpois(slugs$slugs, lambda=mu))) LL } # negative log-likelihood negpois2.LL <- function(p){ -pois2.LL(p) } # obtain estimates using nlm out.pois2 <- nlm(negpois2.LL, c(1.2,1)) # obtain estimates using glm out2 <- glm(slugs~field, data=slugs, family=poisson) # add predicted means to data frame slugtable$mu <- predict(out2, type='response', newdata=data.frame(field=slugtable$Var2)) # calculate Poisson probabilities slugtable$p <- dpois(slugtable$Var1.num, lambda=slugtable$mu) # add tail probability to X = 10 category slugtable$p2 <- dpois(slugtable$Var1.num, lambda=slugtable$mu)+ppois(10, lambda=slugtable$mu, lower.tail=F)*(slugtable$Var1.num==10) # count the number of observations in each field type n <- table(slugs$field) # calculate expected counts slugtable$exp <- slugtable$p2 * n[as.numeric(slugtable$Var2)] # graph Poisson model and raw data library(lattice) xyplot(Freq~Var1.num|Var2, data=slugtable, xlab='Count category', panel=function(x, y, subscripts) { panel.xyplot(x, y, type='h', lineend=1, col='grey', lwd=10) panel.points(x, slugtable$exp[subscripts], pch=16, cex=.6, col=1, type='o') }) # split probabilities by field type slug.p <- split(slugtable$p2, slugtable$Var2) # simulate data for field=nursery rmultinom(1, size=n[1], prob=slug.p[[1]]) # simulate data for field=rookery rmultinom(1, size=n[2], prob=slug.p[[2]]) # Obtain simulations from both nursery and rookery sapply(1:2, function(x) rmultinom(1, size=n[x], prob=slug.p[[x]])) # convert matrix to a vector and save results out.obs <- as.vector(sapply(1:2, function(x) rmultinom(1, size=n[x], prob=slug.p[[x]]))) # calculate Pearson statistic using both field results sum((out.obs-slugtable$exp)^2/slugtable$exp) # write things as a function that can be replicated myfunc <- function() { as.vector(sapply(1:2, function(x) rmultinom(1,size=n[x],prob=slug.p[[x]])))->out.obs sum((out.obs-slugtable$exp)^2/slugtable$exp) } myfunc() # set random seed set.seed(23) # obtain 9999 simulated Pearson statistics replicate(9999, myfunc())->out.pearson max(out.pearson) # actual Pearson statistic actual <- sum((slugtable$Freq-slugtable$exp)^2/slugtable$exp) actual #obtain p-value out.pearson.all<-c(out.pearson,actual) sum(out.pearson.all>=actual)/length(out.pearson.all) # what terms are causing the Pearson statistic to be so large: category X >=10 in nursery round((slugtable$Freq-slugtable$exp)^2/slugtable$exp,3) # graphing the log-likelihood surface ?persp g <- expand.grid(b0=seq(0.5,2,.05), b1=seq(0.5,2,.05)) # calculate log-likelihood on a grid of possible values g$z <- apply(g, 1, pois2.LL) # arrange z-coordinates in matrix to match x-y grid of values z.matrix <- matrix(g$z, nrow=length(seq(0.5,2,.05))) # graph surface persp(seq(0.5,2,.05), seq(0.5,2,.05), z.matrix, xlab='b0', ylab='b1', zlab='log-likelihood') # add tick marks persp(seq(0.5,2,.05), seq(0.5,2,.05), z.matrix, xlab='b0', ylab='b1', zlab='log-likelihood', ticktype="detailed") # rotate around z-axis (theta) and in vertical plane (phi) persp(seq(0.5,2,.05), seq(0.5,2,.05), z.matrix, xlab='b0', ylab='b1', zlab='log-likelihood', ticktype="detailed", theta=30, phi=30) # save surface coordinates out.persp <- persp(seq(0.5,2,.05), seq(0.5,2,.05), z.matrix, xlab='b0', ylab='b1', zlab='log-likelihood', ticktype="detailed", theta=30, phi=30) # obtain coordinates of projection of MLE onto graph trans3d(out.pois2$estimate[1], out.pois2$estimate[2], -out.pois2$minimum, out.persp) # add maximum likelihood estimate to the graph points(trans3d(out.pois2$estimate[1], out.pois2$estimate[2], -out.pois2$minimum, out.persp), pch=16, col=2) # model 1: single mean negative binomial model negNB.LL <- function(p) { mu <- p[1] theta <- p[2] LL <- sum(log(dnbinom(slugs$slugs, mu=mu, size=theta))) -LL } negNB.LL(c(2,2)) negNB.LL(c(2,4)) # model 2: two means negative binomial model negNB.LL2 <- function(p) { mu <- p[1] + p[2]*(slugs$field=='Rookery') theta <- p[3] LL <- sum(log(dnbinom(slugs$slugs, mu=mu, size=theta))) -LL } negNB.LL2(c(2,1,2)) # obtain estimates out.NB1 <- nlm(negNB.LL, c(2,1)) out.NB1 out.NB2 <- nlm(negNB.LL2, c(2,1,1)) out.NB2 # fit model 1 using glm.nb function library(MASS) out.glm1 <- glm.nb(slugs~1, data=slugs) summary(out.glm1) out.NB1$estimate exp(coef(out.glm1)) # fit model 2 using glm.nb function out.glm2 <- glm.nb(slugs~field, data=slugs) summary(out.glm2) out.NB2$estimate # nursery mean exp(coef(out.glm2)[1]) # rookery mean exp(coef(out.glm2)[1]+coef(out.glm2)[2]) # Poisson models: common mean and separate means models out.glm2a <- glm(slugs~field, data=slugs, family=poisson) out.glm1a <- glm(slugs~1, data=slugs, family=poisson) # obtain log-likelihoods of models sapply(list(out.glm1, out.glm2, out.glm1a, out.glm2a), logLik) # Wald test of field effect summary(out.glm2) # likelihood ratio test of field effect anova(out.glm1, out.glm2, test='Chisq')